import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.alibaba.fastjson.JSONObject;
import com.aliyun.odps.udf.UDF;

/**
 * 解析药监局成分字段
 * 
 * @author abc
 *
 */
public class SpliderOtherComposition extends UDF {

	public String evaluate(String html) {
		try {
			//HttpClientAPI HttpClientAPI = new HttpClientAPI();
			//HttpResult doGet = HttpClientAPI.doGet(
			//		"https://bevol-spider.oss-cn-shanghai.aliyuncs.com/file/1/composition/0078078ca0c18bfa63aa0e7ad9f642b3.html?spm=5176.8466032.0.dopenurl.51141450hdezpL");
			//String html = doGet.getBody();
			//JSONObject returnObj = new JSONObject();
			if (StringUtils.isNotBlank(html)) {
				Document doc = Jsoup.parse(html);
				//获取感官指标下一行文本信息
				// String pngs = doc.select("td:contains(颜色：)").text();
				String pngs = doc.select("tr:contains(【感官指标) +tr").text();
				System.out.println(pngs);
				/*
				String[] split = pngs.split("；");
				for (int i = 0; i < split.length; i++) {
					String keyVal = split[i];
					System.out.println(keyVal);
					String[] savV = keyVal.split("：");
					if (savV.length == 2) {
						returnObj.put(savV[0], savV[1]);
					}
				}
				*/
				String aa = pngs.substring(0,pngs.lastIndexOf("；"));
				System.out.println(aa);
				return aa;
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}
}
